package com.webmagic.demo;

import lombok.Getter;
import lombok.Setter;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.ConsolePageModelPipeline;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;

import java.util.List;

/**
 * @description
 * @Auther zhanglu
 * @Date 2018/4/17 上午1:06
 */
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
@Getter
@Setter
public class OschinaBlog {
    @ExtractBy("/html/body/div/div[1]/div[3]/div[5]/div[1]/div[1]/text()")
    private String title;
    @ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
    private String content;
    @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
    private List<String> tags;

    public static void main(String[] args) {
        OOSpider.create(
                Site.me(),
                new ConsolePageModelPipeline(), OschinaBlog.class)
                .addUrl("http://my.oschina.net/flashsword/blog").run();
    }
}
